//This do file creates the following balanced panels:
//1. Produce 2_firm_mn_dist_balanced.dta - this has data at firm-mn-dist level.
//2. Produce 3_firm_mn_dist.dta - this has data at firm-mn-dist level.
 

clear
set matsize 100
set more off 
# delimit;

* Set directory; 

cd "~/Desktop/migrec_replication/do/";

*******************************************************************************;
* Load and merge data;
	
	use ../dta_secure/agency_ratings.dta, clear;
	
	
		expand 300; //25 districts*12years
		bys agency_id: gen count_id = _n;
		egen group = cut(count), group(12);
		g departure_year = group+ 2005;
		bys agency_id departure_year: gen vil_dis_code = _n;
		replace vil_dis_code = 26 if vil_dis_code == 25;
		drop group count_id;
		
		expand 12; //for 12 months 
		bys agency_id departure_year vil_dis_code: gen departure_month = _n;
		
	
	
	/* Now, we merge migrant data in which complaints are recorded by departure year. 
	only drop using data in the first merge (_m_migs) to ensure that the balanced nature 
	of the master dataset is preserved*/
		
		tostring departure_year departure_month, replace;
		merge 1:1 agency_id vil_dis_code departure_year departure_month using ../dta_secure/1_firm_mn_district, gen(_m_migs);		
		drop if _m_migs == 2; //drop if no agency data available
	
	/* tag firms where data on migs available but not necessarily in all districts. 
	this allows me to only drop agencies from master where we don't have any data; */
		bys agency_id: egen tag = max(_m_migs); 
		keep if tag == 3;
		drop tag;
		
	
** merge in complaints data by incident year at the fmd level;  	
 		merge 1:1 agency_id departure_year departure_month vil_dis_code 
 			using ../dta_secure/fmd_inci_comp.dta, gen(inci_complaints_merge);
		destring departure_year departure_month agency_license_yr agency_license_month, replace;
		
** keep only if complaint is leveled against an agency in the panel dataset or obs where there is no complaint match ;
		keep  if inci_complaints_merge == 1 | inci_complaints_merge == 3; 
		


** merge in complaints data by filing year at the fmd level; 
 	
	merge 1:1 agency_id departure_year departure_month vil_dis_code using ../dta_secure/fmd_filed_comp.dta, gen(filed_complaints_merge);
	destring departure_year departure_month agency_license_yr, replace;
		
* keep only if complaint is leveled against an agency in the panel dataset or obs where there is no complaint match ;
	
	keep  if filed_complaints_merge == 1 | filed_complaints_merge == 3; 

*******************************************************************************;
** Clean data ;

	sort agency_id departure_year departure_month vil_dis_code;
	
** Insert 0 if firm is active according to license year but has a missing value; 
		
* we only want to recode migrant-related info (e.g. migrants, complaints);
	ds agency_* license_no departure_year departure_month vil_dis_code _m_* *merge* *_2012 *_2014,not;
	ds `r(varlist)', has(type numeric);
	
	foreach x in `r(varlist)' {;
		replace `x' = 0 if departure_year > agency_license_yr & `x'==.;
		replace `x' = 0 if (departure_year == agency_license_yr) & (departure_month>agency_license_month) & `x'==.; 
	};

** save balanced file; 	
		
	preserve;
		rename vil_dis_code district; 
		rename departure_year year; 
		rename departure_month month; 					
		save ../dta_secure/2_firm_mn_dist_balanced.dta, replace;
	restore;

** merge in job orders at the firm-year level (when analysis must be run at FMD level);
 	
 	rename departure_year year;
	rename departure_month month;

	merge m:1 license_no year using ../dta_secure/firm_joborder_yr, gen(firm_joborder_merge);

	rename vil_dis_code district; 

* save; 
	
	save ../dta_secure/3_firm_mn_dist.dta, replace;
	
	
